Female Nobel Laureates

R
Visuals
2022
Published

June 1, 2022

Get Data

An API is available to get the raw data needed from the relevant website.

#get laureate data with nobel prize API
res1 = GET('http://api.nobelprize.org/2.1/laureates?limit=1000')
json_laureate = fromJSON(rawToChar(res1$content))

laureate <- json_laureate$laureates

laureate %>% glimpse()
Rows: 981
Columns: 23
$ id                <chr> "745", "102", "779", "259", "1004", "114", "982", "9…
$ knownName         <df[,3]> <data.frame[26 x 3]>
$ givenName         <df[,3]> <data.frame[26 x 3]>
$ familyName        <df[,3]> <data.frame[26 x 3]>
$ fullName          <df[,3]> <data.frame[26 x 3]>
$ fileName          <chr> "spence", "bohr", "ciechanover", "klug", "gurnah"…
$ gender            <chr> "male", "male", "male", "male", "male", "male", "…
$ birth             <df[,2]> <data.frame[26 x 2]>
$ wikipedia         <df[,2]> <data.frame[26 x 2]>
$ wikidata          <df[,2]> <data.frame[26 x 2]>
$ sameAs            <list> <"https://www.wikidata.org/wiki/Q157245", "https:…
$ links             <list> [<data.frame[2 x 6]>], [<data.frame[2 x 6]>], [<d…
$ nobelPrizes       <list> [<data.frame[1 x 12]>], [<data.frame[1 x 12]>], [<da…
$ death             <df[,2]> <data.frame[26 x 2]>
$ orgName           <df[,3]> <data.frame[26 x 3]>
$ acronym           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ founded           <df[,2]> <data.frame[26 x 2]>
$ nativeName        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penName           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penNameOf         <df[,1]> <data.frame[26 x 1]>
$ foundedCountry    <df[,3]> <data.frame[26 x 3]>
$ foundedCountryNow <df[,3]> <data.frame[26 x 3]>
$ foundedContinent  <df[,1]> <data.frame[26 x 1]>

Data Munging

Award Winner - Names

#create data of noble laureates
df_laureate <- laureate %>%
  unnest(c(fullName, givenName, familyName, birth),  names_repair = tidyr_legacy) %>%
  select(id, en, en1, en2, gender, date, place) %>%
  rename(id="id",
       last_name = "en1",
       first_name = "en",
       full_name = "en2",
       birth_date = "date") %>%
  unnest(place) %>%
  unnest(cityNow, countryNow, names_repair = tidyr_legacy) %>%
  select(id, full_name, first_name, last_name, birth_date, gender, en, en1) %>%
  rename(birth_city = "en",
        birth_country = "en1")

df_laureate %>% glimpse()
Rows: 981
Columns: 8
$ id            <chr> "745", "102", "779", "259", "1004", "114", "982", "981",…
$ full_name     <chr> "A. Michael Spence", "Aage Niels Bohr", "Aaron Ciechanov…
$ first_name    <chr> "A. Michael", "Aage N.", "Aaron", "Aaron", "Abdulrazak",…
$ last_name     <chr> "Spence", "Bohr", "Ciechanover", "Klug", "Gurnah", "Sala…
$ birth_date    <chr> "1943-00-00", "1922-06-19", "1947-10-01", "1926-08-11", …
$ gender        <chr> "male", "male", "male", "male", "male", "male", "male", …
$ birth_city    <chr> "Montclair, NJ", "Copenhagen", "Haifa", "Zelvas", NA, "J…
$ birth_country <chr> "USA", "Denmark", "Israel", "Lithuania", NA, "Pakistan",…

Award Categories

#create data set of awards (noble prizes)
df_prize <- laureate %>% select(id, nobelPrizes) %>%
  unnest(nobelPrizes, repair = "universal") %>%
  select(id, awardYear, category) %>%
  unnest(category) %>%
  select(id, awardYear, en) %>%
  rename(laureate_id = "id", award_year = "awardYear", category = "en")

df_prize %>% glimpse()
Rows: 989
Columns: 3
$ laureate_id <chr> "745", "102", "779", "259", "1004", "114", "982", "981", "…
$ award_year  <chr> "2001", "1975", "2004", "1982", "2021", "1979", "2019", "2…
$ category    <chr> "Economic Sciences", "Physics", "Chemistry", "Chemistry", …

Join the Data

#combine the two datasets
df_prize_laureate <- left_join(df_prize, df_laureate, by = c("laureate_id"="id"))
#convert year to integer
df_prize_laureate$award_year <- as.integer(df_prize_laureate$award_year)
df_prize_laureate$count <- 1

df_prize_laureate %>% glimpse()
Rows: 989
Columns: 11
$ laureate_id   <chr> "745", "102", "779", "259", "1004", "114", "982", "981",…
$ award_year    <int> 2001, 1975, 2004, 1982, 2021, 1979, 2019, 2019, 2009, 20…
$ category      <chr> "Economic Sciences", "Physics", "Chemistry", "Chemistry"…
$ full_name     <chr> "A. Michael Spence", "Aage Niels Bohr", "Aaron Ciechanov…
$ first_name    <chr> "A. Michael", "Aage N.", "Aaron", "Aaron", "Abdulrazak",…
$ last_name     <chr> "Spence", "Bohr", "Ciechanover", "Klug", "Gurnah", "Sala…
$ birth_date    <chr> "1943-00-00", "1922-06-19", "1947-10-01", "1926-08-11", …
$ gender        <chr> "male", "male", "male", "male", "male", "male", "male", …
$ birth_city    <chr> "Montclair, NJ", "Copenhagen", "Haifa", "Zelvas", NA, "J…
$ birth_country <chr> "USA", "Denmark", "Israel", "Lithuania", NA, "Pakistan",…
$ count         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…

Reshape the Data

#reshape data
df_grouping <- df_prize_laureate %>%
  complete(category = unique(df_prize_laureate$category), award_year = 1901:2021) %>%
  group_by(category,award_year) %>%
  summarize(total_count = sum(count),
            male_count = sum(count[gender == "male"]),
            female_count = sum(count[gender == "female"])) %>%
  mutate(
    grouping=case_when(female_count == total_count ~ "Female",
                       male_count == total_count ~ "Male",
                       female_count > 0 ~"Mixed Team"),
    award_decade = round(award_year / 10) * 10,
    year_split = case_when(award_year >= 1981 ~"1981-2021",
                           award_year >= 1941 ~"1941-1980",
                           award_year >= 1901~"1901-1940")
  )

#categories
category_list = c("Physiology or Medicine","Physics","Chemistry","Literature","Peace","Economic Sciences")
#factor categories, i.e. Economic Sciences not introduced until later
df_grouping$category <- factor(df_grouping$category, levels = rev(category_list))

#palette
pal<-c('#D90368','#2274A5','#F1C40F')

df_grouping %>% glimpse()
Rows: 732
Columns: 8
Groups: category [6]
$ category     <fct> Chemistry, Chemistry, Chemistry, Chemistry, Chemistry, Ch…
$ award_year   <int> 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 191…
$ total_count  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, NA, NA, 1, N…
$ male_count   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 1, 1, 1, NA, NA, 1, N…
$ female_count <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, NA, NA, 0, N…
$ grouping     <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "…
$ award_decade <dbl> 1900, 1900, 1900, 1900, 1900, 1910, 1910, 1910, 1910, 191…
$ year_split   <chr> "1901-1940", "1901-1940", "1901-1940", "1901-1940", "1901…

Visualize

#plot
g1 <- ggplot(df_grouping, aes(x = award_year, y = category, fill = grouping)) +
  geom_tile(color = "white", width = .9, height = .9) + 
  scale_fill_manual(values = pal, na.value = "grey85",
                    guide = guide_legend(title.position = "top", title.hjust=0.5)) +
  facet_wrap(~ year_split, ncol = 1, scales = "free_x") +
  labs(title = "Noble Prize Laureates",
       caption ="Data from Noble Prize API",
       subtitle = 'Note: Some categories and years have more than one recipient, \n "Mixed" denotes a team with male and female laureates',
       x = "Year",
       y = "Category",
       fill = "Recipient Gender") +
  theme_void() +
  theme(text = element_text(),
        legend.position = "bottom",
        plot.title = element_text(hjust = 0.5, size = 20, vjust = 5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5, vjust = 6, size = 12),
        #axis.title.x = element_text(family = "nunito"),
        #axis.text = element_text(family = "nunito"),
        axis.text.y = element_text(hjust = 1, size = 10),
        strip.text.x = element_text(size = 12),
        plot.caption = element_text(size = 10, hjust = 0.95),
        plot.margin = unit(c(1.1, 0.8, 0.8, 0.8), "cm"),
        legend.spacing.x = unit(0.8, 'cm'),
        legend.box.margin=margin(0, 0, 0.25, 0))

g1

Awarded Women List

df_prize_laureate %>% filter(gender == "female") %>% select(full_name, birth_country, award_year) %>%
  arrange(award_year) %>% 
  rename("Fulll Name" = full_name, "Country" = birth_country, "Year Awarded" = award_year) %>% 
  gt() %>% tab_header(title = md("**Nobel Laureate Women**"))
Nobel Laureate Women
Fulll Name Country Year Awarded
Marie Curie, née Sklodowska Poland 1903
Baroness Bertha Sophie Felicita von Suttner, née Countess Kinsky von Chinic und Tettau Czech Republic 1905
Selma Ottilia Lovisa Lagerlöf Sweden 1909
Marie Curie, née Sklodowska Poland 1911
Grazia Deledda Italy 1926
Sigrid Undset Denmark 1928
Jane Addams USA 1931
Irène Joliot-Curie France 1935
Pearl Buck USA 1938
Gabriela Mistral Chile 1945
Emily Greene Balch USA 1946
Gerty Theresa Cori, née Radnitz Czech Republic 1947
Maria Goeppert Mayer Poland 1963
Dorothy Crowfoot Hodgkin Egypt 1964
Nelly Sachs Germany 1966
Elizabeth Williams Northern Ireland 1976
Mairead Corrigan Northern Ireland 1976
Rosalyn Yalow USA 1977
Mother Teresa North Macedonia 1979
Alva Myrdal Sweden 1982
Barbara McClintock USA 1983
Rita Levi-Montalcini Italy 1986
Gertrude B. Elion USA 1988
Aung San Suu Kyi Myanmar 1991
Nadine Gordimer South Africa 1991
Rigoberta Menchú Tum Guatemala 1992
Toni Morrison USA 1993
Christiane Nüsslein-Volhard Germany 1995
Wislawa Szymborska Poland 1996
Jody Williams USA 1997
Shirin Ebadi Iran 2003
Elfriede Jelinek Austria 2004
Linda B. Buck USA 2004
Wangari Muta Maathai Kenya 2004
Doris Lessing Iran 2007
Françoise Barré-Sinoussi France 2008
Ada E. Yonath Israel 2009
Carol W. Greider USA 2009
Elinor Ostrom USA 2009
Elizabeth H. Blackburn Australia 2009
Herta Müller Romania 2009
Ellen Johnson Sirleaf Liberia 2011
Leymah Gbowee Liberia 2011
Tawakkol Karman Yemen 2011
Alice Munro Canada 2013
Malala Yousafzai Pakistan 2014
May-Britt Moser Norway 2014
Svetlana Alexievich Ukraine 2015
Tu Youyou China 2015
Donna Strickland Canada 2018
Frances H. Arnold USA 2018
Nadia Murad Basee Taha Iraq 2018
Olga Tokarczuk Poland 2018
Esther Duflo France 2019
Andrea Ghez USA 2020
Emmanuelle Charpentier France 2020
Jennifer A. Doudna USA 2020
Louise Glück USA 2020
Maria Ressa Philippines 2021
Annie Ernaux France 2022
Carolyn R. Bertozzi USA 2022

Female Laureates Count by Country

df_prize_laureate %>% filter(gender == "female") %>% select(full_name, birth_country, award_year) %>%
  rename("Country" = birth_country) %>% 
  group_by(Country) %>% summarise("Country Count" = n()) %>% arrange(desc(`Country Count`)) %>% 
  gt() %>% tab_header(title = md("**Nobel Laureate Women by Country**"))
Nobel Laureate Women by Country
Country Country Count
USA 16
France 5
Poland 5
Canada 2
Czech Republic 2
Germany 2
Iran 2
Italy 2
Liberia 2
Northern Ireland 2
Sweden 2
Australia 1
Austria 1
Chile 1
China 1
Denmark 1
Egypt 1
Guatemala 1
Iraq 1
Israel 1
Kenya 1
Myanmar 1
North Macedonia 1
Norway 1
Pakistan 1
Philippines 1
Romania 1
South Africa 1
Ukraine 1
Yemen 1